Hits identification

Initialization

We start the analysis by initializing the packages required for all the analysis performed in this section. We also define the root directory, within which all the input/output operations for this project will be performed. At the end of this document, detailed software version information is provided for easier reproducibility of the analysis.


Hits identification

Here we show an interactive way to to access the normalized screening data. The red, grey and green horizontal lines highlight the 95th quantile, median and 5th quantile of roGFP2 values repectively in that condition.

normDat = readRDS(paste0(path, "data/workspaces/YeastMutantRedox_NormalizedData_RobustZ.RDS"))

hits = normDat$redox_table %>% group_by(Organelle, Nutrient) %>% arrange(-Median_roGFP2_ratio) %>% 
    
mutate(Rank1 = dense_rank(-Median_roGFP2_ratio)) %>% mutate(Rank2 = dense_rank(Median_roGFP2_ratio)) %>% 
    
mutate(Median_roGFP2_ratio = round(Median_roGFP2_ratio, 2)) %>% mutate(MedVal = median(Median_roGFP2_ratio)) %>% 
    mutate(upperquant = quantile(Median_roGFP2_ratio, 0.95)) %>% mutate(lowerquant = quantile(Median_roGFP2_ratio, 
    0.05)) %>% ungroup()

p_print = ggplot(hits, aes(x = Rank1, y = Median_roGFP2_ratio)) + theme_bw(base_size = 7) + 
    labs(x = "Sorted Ranks of mutants (high to low roGFP2 ratios)", y = "Normalized by plate median (roGFP2 ratios)") + 
    
geom_point(size = 0.3) + 
facet_wrap(Organelle ~ Nutrient, scales = "free", ncol = 6) + 
geom_hline(aes(yintercept = MedVal), col = "grey20") + geom_hline(aes(yintercept = upperquant), 
    col = "firebrick") + geom_hline(aes(yintercept = lowerquant), col = "forestgreen") + 
    
theme(axis.text.x = element_blank(), axis.ticks = element_blank(), axis.text = element_text(colour = "black"), 
    strip.background = element_blank(), panel.border = element_rect(size = 0.3), 
    panel.grid = element_blank()) + 
geom_text(data = hits[hits$Rank1 <= 10, ], aes(x = 2500, label = Genes), size = 2) + 
    geom_text(data = hits[hits$Rank2 <= 10, ], aes(x = 2500, label = Genes), size = 2)

pdf(paste0(path, "analysis/tophits/top_and_low_10_hits.pdf"), width = 7.5, height = 2.5)
print(p_print)
invisible(dev.off())


p_html = ggplot(hits) + theme_minimal(base_size = 9) + labs(x = "Sorted Ranks of mutants (high to low roGFP2 ratios)", 
    y = "Normalized by plate median (roGFP2 ratios)") + geom_point(aes(text = Genes, 
    x = Rank1, y = Median_roGFP2_ratio)) + facet_wrap(Organelle ~ Nutrient, scales = "free", 
    ncol = 3) + geom_hline(aes(yintercept = MedVal), col = "grey20") + geom_hline(aes(yintercept = upperquant), 
    col = "firebrick") + geom_hline(aes(yintercept = lowerquant), col = "forestgreen") + 
    theme(axis.text.x = element_text(angle = 90, hjust = 0.5, vjust = 0.5))

ggplotly(p_html) %>% toWebGL()

Hits overlaps

Next we look at the overlaps between top hits i.e topmost 5% and lowest 5% of roGFP2 ratios across different nutrient conditions and organelles. This corresponds to the hits (dots) above the red and below the green horizontal lines in the above figure.

#Cytoplasm
cyto = normDat$redox_table[normDat$redox_table$Organelle == "Cytoplasm",]
cyto = split(cyto, cyto$Nutrient)

#Mitochondria
mito = normDat$redox_table[normDat$redox_table$Organelle == "Mitochondria",]
mito = split(mito, mito$Nutrient)

cytoOX = lapply(cyto, function(x){
                                  genes = x$Genes
                                  vals = x$Median_roGFP2_ratio
                                  qt.top = quantile(vals, 0.95)
                                  selgenes = unique(genes[vals > qt.top])
                                  })

mitoOX = lapply(mito, function(x){
                                  genes = x$Genes
                                  vals = x$Median_roGFP2_ratio
                                  qt.top = quantile(vals, 0.95)
                                  selgenes = unique(genes[vals > qt.top])
                                  })

cytoRX = lapply(cyto, function(x){
                                  genes = x$Genes
                                  vals = x$Median_roGFP2_ratio
                                  qt.low = quantile(vals, 0.05)
                                  selgenes = unique(genes[vals < qt.low])
                                  })

mitoRX = lapply(mito, function(x){
                                  genes = x$Genes
                                  vals = x$Median_roGFP2_ratio
                                  qt.low = quantile(vals, 0.05)
                                  selgenes = unique(genes[vals < qt.low])
                                  })

topHits = list(
             Cytoplasm_Glucose_high_roGFP2 = cytoOX$Glucose,
             Cytoplasm_Galactose_high_roGFP2 = cytoOX$Galactose,
             Cytoplasm_Glycerol_high_roGFP2 = cytoOX$Glycerol,
             
             Mitochondria_Glucose_high_roGFP2 = mitoOX$Glucose,
             Mitochondria_Galactose_high_roGFP2 = mitoOX$Galactose,
             Mitochondria_Glycerol_high_roGFP2 = mitoOX$Glycerol,
             
             Cytoplasm_Glucose_low_roGFP2 = cytoRX$Glucose,
             Cytoplasm_Galactose_low_roGFP2 = cytoRX$Galactose,
             Cytoplasm_Glycerol_low_roGFP2 = cytoRX$Glycerol,

             Mitochondria_Glucose_low_roGFP2 = mitoRX$Glucose,
             Mitochondria_Galactose_low_roGFP2 = mitoRX$Galactose,
             Mitochondria_Glycerol_low_roGFP2 = mitoRX$Glycerol
           
             )

topPaths = function(dat)
{
  a  = Reduce(intersect, dat)
  gs1 = sort(sapply(genesets$KEGG, function(x){sum(a %in% x)/length(a)*100}),decreasing = T)
  gs1 = gs1[gs1 > 0]
  if(length(gs1) < 10){gs1 = names(gs1)} else{gs1 = names(gs1)[1:10]}
  return(gs1)
}

topHits_allcarbon = list(common_hits = list(
                                        Cytoplasm_high_roGFP2_commom = Reduce(intersect, cytoOX),
                                        Cytoplasm_low_roGFP2_commom = Reduce(intersect, cytoRX),
                                        Mitochondria_high_roGFP2_commom = Reduce(intersect, mitoOX),
                                        Mitochondria_low_roGFP2_commom = Reduce(intersect, mitoRX)
                              ),
                         common_pathways = list(
                                        Cytoplasm_high_roGFP2_commom_path = topPaths(cytoOX),
                                        Cytoplasm_low_roGFP2_commom_path = topPaths(cytoRX),
                                        Mitochondria_high_roGFP2_commom_path = topPaths(mitoOX),
                                        Mitochondria_low_roGFP2_commom_path = topPaths(mitoRX)
                             )
                        )

rm(cyto, mito, cytoRX, cytoOX, mitoRX, mitoOX, topPaths)

Lowest 5% hits as UpSet plot


Next we show tables for the overlaps between top hits i.e topmost 5% and lowest 5% of roGFP2 ratios across different nutrient conditions and organelles.

Common hits TOPMOST 5% roGFP2 ratios: Cytoplasm - all nutrient conditions

Common hits LOWEST 5% roGFP2 ratios: Cytoplasm - all nutrient conditions

Common hits TOPMOST 5% roGFP2 ratios: Mitochondria - all nutrient conditions

Common hits LOWEST 5% roGFP2 ratios: Mitochondria - all nutrient conditions

redoxVenn = function(index, col.groups, col.circles, col.main)
{
  vennP = venn.diagram(
  x = topHits[index],
  category.names = sapply(strsplit(names(topHits), "_"), function(x)x[2])[index],
  main = sapply(strsplit(names(topHits), "_"), function(x)x[1])[index[1]],
  filename = NULL,
  
  lwd = 0.7,
  col = col.circles,
  main.col = col.main,
  cex = 0.5,
  fontfamily = "sans",
  main.cex =  0.5,
  cat.cex = 0.5,
  cat.default.pos = "outer",
  cat.pos = c(-30, 30, 180),
  cat.dist = c(0.05, 0.05, 0.05),
  cat.fontfamily = "sans",
  cat.col = col.groups)
  
  return(vennP)
}

# Cyto - Glu, Gal, Gly - HIGH roGFP2
venn1 = redoxVenn(index = 1:3, col.groups = rep("grey70",3), col.circles = rep("#252525",3), col.main ="#A6611A")
  
# Mito - Glu, Gal, Gly - HIGH roGFP2
venn2 = redoxVenn(index = 4:6, col.groups = rep("grey70",3), col.circles = rep("#252525",3), col.main ="#7B3294")

# Cyto - Glu, Gal, Gly - LOW roGFP2
venn3 = redoxVenn(index = 7:9, col.groups = rep("grey70",3), col.circles = rep("#bdbdbd",3), col.main ="#A6611A")
  
# Mito - Glu, Gal, Gly - LOW roGFP2
venn4 = redoxVenn(index = 10:12, col.groups = rep("grey70",3), col.circles = rep("#bdbdbd",3), col.main ="#7B3294")

# PLOT
pdf(paste0(path, 'analysis/tophits/compartment_specific_venn_top_low_5per.pdf'), width = 3, height = 1.5)

  # layout
  gl <- grid.layout(nrow = 2, ncol = 4)
  
  # setup viewports
  vp.1 <- viewport(layout.pos.col = 1, layout.pos.row = 1) 
  vp.2 <- viewport(layout.pos.col = 2, layout.pos.row = 1) 
  vp.3 <- viewport(layout.pos.col = 3, layout.pos.row = 1) 
  vp.4 <- viewport(layout.pos.col = 4, layout.pos.row = 1) 
  vp.5 <- viewport(layout.pos.col = 1, layout.pos.row = 2) 
  vp.6 <- viewport(layout.pos.col = 2, layout.pos.row = 2) 
  vp.7 <- viewport(layout.pos.col = 3, layout.pos.row = 2) 
  vp.8 <- viewport(layout.pos.col = 4, layout.pos.row = 2) 
  
  # init layout
  pushViewport(viewport(layout = gl))

  pushViewport(vp.1)
  grid.text(label = topHits_allcarbon$common_pathways$Cytoplasm_high_roGFP2_commom_path, 
            x = 0.99, y = seq(0.9, 0.11, -0.08), just = "right", gp=gpar(fontsize=6, col="grey"))
  popViewport()
  
  pushViewport(vp.2)
  grid.draw(venn1)
  popViewport()
  
  pushViewport(vp.3)
  grid.draw(venn2)
  popViewport()
  
  pushViewport(vp.4)
   grid.text(label = topHits_allcarbon$common_pathways$Mitochondria_high_roGFP2_commom_path, 
            x = 0.01, y = seq(0.9, 0.11, -0.08), just = "left", gp=gpar(fontsize=6, col="grey"))
  popViewport()
 
  pushViewport(vp.5)
   grid.text(label = topHits_allcarbon$common_pathways$Cytoplasm_low_roGFP2_commom_path, 
            x = 0.99, y = seq(0.9, 0.11, -0.08), just = "right", gp=gpar(fontsize=6, col="grey"))
  popViewport()
  
  pushViewport(vp.6)
  grid.draw(venn3)
  popViewport()
  
  pushViewport(vp.7)
  grid.draw(venn4)
  popViewport()
  
  pushViewport(vp.8)
  grid.text(label = topHits_allcarbon$common_pathways$Mitochondria_low_roGFP2_commom_path, 
            x = 0.01, y = seq(0.9, 0.11, -0.08), just = "left", gp=gpar(fontsize=6, col="grey"))
  popViewport()
  
  popViewport(1)
  
  rm(gl, vp.1, vp.2, vp.3, vp.4, vp.5, vp.6, vp.7, vp.8, venn1, venn2, venn3, venn4)
invisible(dev.off())
[1] TRUE

Positive control test

Next we tried to validate the screening results by looking at the roGFP2 ratios of mutants know to drastically increase oxidation. We subset a list of genes from the yeast pathway terms

  1. response to oxidative stress
  2. oxidoreductase activity
  3. Oxidative phosphorylation
  4. Glutathione metabolism

and selected the core list of genes shown below.

 [1] "MSN2" "MSN4" "YAP1" "SKN7" "GND1" "GND2" "TAL1" "SOL3" "SOL4" "RPE1"
[11] "TKL1" "ALD6" "IDP1" "POS5" "IDP2" "IDP3" "ALD4" "ALD5" "YEF1" "UTR1"
[21] "ZWF1" "MAE1" "GLR1" "GPX1" "GPX2" "GPX3" "GRX1" "GRX2" "GRX3" "GRX4"
[31] "GRX5" "GRX6" "GRX7" "GRX8" "GSH1" "GSH2" "TRX1" "TRX2" "TRX3" "TRR1"
[41] "TRR2" "PRX1" "TSA1" "TSA2" "AHP1" "DOT5" "CTT1" "CTA1" "SOD1" "SOD2"
[51] "CCP1"

Session information

R version 3.6.2 (2019-12-12)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS  10.16

Matrix products: default
BLAS:   /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
[1] grid      stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] DT_0.12             VennDiagram_1.6.20  futile.logger_1.4.3
 [4] venn_1.10           UpSetR_1.4.0        gplots_3.0.1.2     
 [7] ggpubr_0.2.4        magrittr_1.5        ggrepel_0.8.1      
[10] RColorBrewer_1.1-2  plotly_4.9.2        ggplot2_3.2.1      
[13] reshape2_1.4.3      dplyr_0.8.4         rmdformats_0.3.6   
[16] knitr_1.28         

loaded via a namespace (and not attached):
 [1] Rcpp_1.0.3           tidyr_1.0.2          gtools_3.8.1        
 [4] assertthat_0.2.1     digest_0.6.23        mime_0.9            
 [7] R6_2.4.1             plyr_1.8.5           futile.options_1.0.1
[10] evaluate_0.14        httr_1.4.1           pillar_1.4.3        
[13] rlang_0.4.4          lazyeval_0.2.2       data.table_1.12.8   
[16] gdata_2.18.0         rmarkdown_2.1        labeling_0.3        
[19] stringr_1.4.0        htmlwidgets_1.5.1    munsell_0.5.0       
[22] shiny_1.4.0          compiler_3.6.2       httpuv_1.5.5        
[25] xfun_0.12            pkgconfig_2.0.3      htmltools_0.4.0     
[28] tidyselect_1.0.0     tibble_2.1.3         gridExtra_2.3       
[31] bookdown_0.17        viridisLite_0.3.0    crayon_1.3.4        
[34] withr_2.1.2          later_1.0.0          bitops_1.0-6        
[37] jsonlite_1.6.1       xtable_1.8-4         gtable_0.3.0        
[40] lifecycle_0.1.0      formatR_1.7          scales_1.1.0        
[43] KernSmooth_2.23-16   stringi_1.4.5        farver_2.0.3        
[46] ggsignif_0.6.0       promises_1.1.0       ellipsis_0.3.0      
[49] admisc_0.12          vctrs_0.2.2          lambda.r_1.2.4      
[52] tools_3.6.2          Cairo_1.5-11         glue_1.3.1          
[55] purrr_0.3.3          crosstalk_1.0.0      fastmap_1.0.1       
[58] yaml_2.2.1           colorspace_1.4-1     caTools_1.18.0      

Ashwini Kumar Sharma, PhD

2021-05-18